Production over Station and Season
ggplot(dplyr::filter(metadata, year == "2015" & fraction %in% c("Free", "Particle") &
norep_filter_name != "MOTEJ515"),
aes(x = lakesite, y = log10(as.numeric(fraction_bac_abund)), fill = fraction)) +
geom_bar(stat = "identity", color = "black", position=position_dodge()) +
facet_grid(. ~ season) +
scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Lake Station") + ylab("Log10(Bacterial Cells/mL)") +
theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust = 1),
legend.position = "bottom", legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.5), "cm")) #top, right, bottom, and left margins)

prod1 <- ggplot(dplyr::filter(metadata, year == "2015" & fraction %in% c("Free", "Particle")),
aes(x = lakesite, y = frac_bacprod, fill = fraction)) +
geom_bar(stat = "identity", color = "black", position=position_dodge()) +
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod),
position = position_dodge(width = 0.75), width = 0.25) +
facet_grid(. ~ season) +
scale_y_continuous(expand = c(0,0), limits = c(0, 80),
breaks = seq(from = 0, to = 80, by = 15)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Lake Station") + ylab("Community Production\n(ug C/L/Hr)") +
theme(axis.text.x = element_blank(), #element_text(angle = 30, vjust = 1, hjust = 1),
axis.title.x = element_blank(),
legend.position = c(0.85, 0.9), legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.25), "cm")) #top, right, bottom, and left margins)
prod2 <- ggplot(dplyr::filter(metadata, year == "2015" & fraction %in% c("Free", "Particle")),
aes(x = lakesite, y = fracprod_per_cell_noinf, fill = fraction)) +
geom_bar(stat = "identity", color = "black", position=position_dodge()) +
facet_grid(. ~ season) +
scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Lake Station") + ylab("Cellular Production \n(ug C/cell/Hr)") +
theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust = 1),
legend.position = c(0.85, 0.9), legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.5), "cm")) #top, right, bottom, and left margins)
########## LONG FORMAT OF TOTAL PRODUCTIVITY
ggplot(dplyr::filter(metadata, year == "2015" & fraction == "Free"),
aes(x = lakesite, y = tot_bacprod)) +
geom_bar(stat = "identity", color = "black", fill = "grey", position=position_dodge()) +
geom_errorbar(aes(ymin = tot_bacprod - SD_tot_bacprod, ymax = tot_bacprod + SD_tot_bacprod),width = 0.25) +
facet_grid(season ~., scales = "free") +
scale_y_continuous(expand = c(0,0), limits = c(0, 80),
breaks = seq(from = 0, to = 80, by = 15)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Lake Station") + ylab("Community Production\n(ug C/L/Hr)") +
theme(legend.position = c(0.85, 0.9), legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.25), "cm")) #top, right, bottom, and left margins)

########## LONG FORMAT OF COMMUNITY PRODUCTIVITY
community_prod_plot_long <-
ggplot(dplyr::filter(metadata, year == "2015" & fraction %in% c("Free", "Particle")),
aes(x = lakesite, y = frac_bacprod, fill = fraction)) +
geom_bar(stat = "identity", color = "black", position=position_dodge()) +
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod),
position = position_dodge(width = 0.75), width = 0.25) +
facet_grid(season ~., scales = "free") +
scale_y_continuous(expand = c(0,0), limits = c(0, 80),
breaks = seq(from = 0, to = 80, by = 15)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Lake Station") + ylab("Community Production\n(ug C/L/Hr)") +
theme(legend.position = "bottom", legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.25), "cm")) #top, right, bottom, and left margins)
########## LONG FORMAT OF CELLULAR PRODUCTIVITY
percapita_prod_plot_long <-
ggplot(dplyr::filter(metadata, year == "2015" & fraction %in% c("Free", "Particle")),
aes(x = lakesite, y = fracprod_per_cell_noinf, fill = fraction)) +
geom_bar(stat = "identity", color = "black", position=position_dodge()) +
facet_grid(season ~ ., scales = "free_x") +
scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Lake Station") + ylab("Cellular Production \n(ug C/cell/Hr)") +
theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust = 1),
legend.position = "bottom", legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.5), "cm")) #top, right, bottom, and left margins)
plot_grid(community_prod_plot_long, percapita_prod_plot_long,
labels = c("A", "B"),
nrow = 1, ncol = 2,
align = "v")

plot_grid(prod1, prod2, labels = c("A", "B"),
rel_heights = c(1, 1.3),
nrow = 2, ncol = 1,
align = "v")

### BY STATION
prod_by_station <- ggplot(dplyr::filter(metadata, year == "2015" & fraction %in% c("Free")),
aes(x = lakesite, y = tot_bacprod)) +
geom_boxplot(fill = "grey") + geom_point(size = 3.5, position = position_jitter()) +
scale_y_continuous(expand = c(0,0), limits = c(0,100)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Lake Station") + ylab("Total Production (ug C/cell/Hr)") +
theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust = 1),
legend.position = c(0.85, 0.9), legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.5), "cm")) #top, right, bottom, and left margins)
prod_by_season <- ggplot(dplyr::filter(metadata, year == "2015" & fraction %in% c("Free")),
aes(x = season, y = tot_bacprod)) +
geom_boxplot(fill = "grey") + geom_point(size = 3.5, position = position_jitter()) +
scale_y_continuous(expand = c(0,0), limits = c(0,100)) +
scale_fill_manual(values = fraction_colors) +
guides(fill=guide_legend(nrow=1,byrow=TRUE)) +
xlab("Season") +
theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust = 1),
axis.title.y = element_blank(),
legend.position = c(0.85, 0.9), legend.title = element_blank(),
plot.margin = unit(c(0.25, 0.25, 0.25, 0.5), "cm")) #top, right, bottom, and left margins)
plot_grid(prod_by_station, prod_by_season, labels = c("A", "B"),
rel_widths = c(1, 0.75),
nrow = 1, ncol = 2,
align = "h")

scaled_surface_PAFLA_pruned_rm2 <- scale_reads(surface_PAFL_otu_pruned_notree_rm2)
set.seed(777)
# Calculate the SOREN distance
soren_whole_dist <- ordinate(
physeq = scaled_surface_PAFLA_pruned_rm2,
method = "PCoA",
distance = "bray",
binary = TRUE) # Make it presence/absence
p1 <- plot_ordination(
physeq = scaled_surface_PAFLA_pruned_rm2,
ordination = soren_whole_dist,
color = "fraction",
shape = "season",
title = "Sørensen") +
geom_point(size=5, alpha = 0.7, aes(fill =fraction, color = fraction)) +
scale_colour_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
theme(legend.position = "none")
# Calculate the BRAY-CURTIS distance
bray_whole_dist <- ordinate(
physeq = scaled_surface_PAFLA_pruned_rm2,
method = "PCoA",
distance = "bray",
binary = FALSE) # Make it Abundance weighted
p2 <- plot_ordination(
physeq = scaled_surface_PAFLA_pruned_rm2,
ordination = bray_whole_dist ,
color = "fraction",
shape = "season",
title = "Bray-Curtis") +
geom_point(size=5, alpha = 0.7, aes(fill =fraction, color = fraction)) +
scale_colour_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
theme(legend.position = "bottom", legend.title = element_blank())
p_lakesite <-
plot_ordination(
axes = 2:3,
physeq = scaled_surface_PAFLA_pruned_rm2,
ordination = bray_whole_dist ,
color = "fraction",
shape = "lakesite",
title = "Bray-Curtis") +
geom_point(size=5, alpha = 0.7, aes(fill =fraction, color = fraction)) +
scale_colour_manual(values = fraction_colors) +
scale_size_manual(values = c(3.5, 3.5, 3.5, 3.5)) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = lakesite_shapes) +
theme(legend.position = "bottom", legend.title = element_blank())
### PCOA FIGURE
pcoa_fig_row1 <- plot_grid(p1,
p2 + theme(legend.position = "none"),
labels = c("A", "B"), align = "h", ncol = 2,
rel_widths = c(1, 1))
season_legend <- get_legend(p2)
lakesite_legend <- get_legend(p_lakesite)
plot_grid(pcoa_fig_row1, season_legend,
nrow = 2, ncol = 1,
rel_heights = c(1, 0.05))

# Calculate bray curtis distance matrix
musk_bray <- phyloseq::distance(scaled_surface_PAFLA_pruned_rm2, method = "bray")
# make a data frame from the sample_data
sampledf <- data.frame(sample_data(scaled_surface_PAFLA_pruned_rm2))
# Adonis test
adonis(musk_bray ~ fraction + season , data = sampledf)
##
## Call:
## adonis(formula = musk_bray ~ fraction + season, data = sampledf)
##
## Permutation: free
## Number of permutations: 999
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## fraction 1 1.3903 1.39032 16.2279 0.30094 0.001 ***
## season 2 1.5161 0.75804 8.8479 0.32816 0.001 ***
## Residuals 20 1.7135 0.08567 0.37089
## Total 23 4.6199 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Linear Models with Environmental Variables
pca_scores_df <- summary(pca_environ)$sites %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "norep_filter_name") %>%
mutate(norep_water_name = paste(substr(norep_filter_name, 1, 4), substr(norep_filter_name, 6, 8), sep = "")) %>%
dplyr::select(-norep_filter_name)
metadata_pca <- metadata %>%
mutate(norep_water_name = paste(substr(norep_filter_name, 1, 4), substr(norep_filter_name, 6, 8), sep = "")) %>%
left_join(pca_scores_df, by = "norep_water_name")
##### SINGLE REGRESSION with PC1
## Free living
comm_lm_PC1_free <- summary(lm(frac_bacprod ~ PC1,
data = filter(metadata_pca, fraction == "Free")))
## Particle-associated
comm_lm_PC1_part <- summary(lm(frac_bacprod ~ PC1,
data = filter(metadata_pca, fraction == "Particle")))
## PER CAPITA: Free living
percap_lm_PC1_free <- summary(lm(log10(fracprod_per_cell_noinf) ~ PC1,
data = filter(metadata_pca, fraction == "Free")))
## PER CAPITA: Particle-associated
percap_lm_PC1_part <- summary(lm(log10(fracprod_per_cell_noinf) ~ PC1,
data = filter(metadata_pca, fraction == "Particle")))
# Put the results all together
pca_lm_row1 <- c("Free", "Per-Liter","PC1",
round(comm_lm_PC1_free$adj.r.squared, digits = 3),
round(comm_lm_PC1_free$coefficients[2,4], digits = 3))
pca_lm_row2 <- c("Particle", "Per-Liter","PC1",
round(comm_lm_PC1_part$adj.r.squared, digits = 3),
round(comm_lm_PC1_part$coefficients[2,4], digits = 3))
pca_lm_row3 <- c("Free", "Per-Capita","PC1",
round(percap_lm_PC1_free$adj.r.squared, digits = 3),
round(percap_lm_PC1_free$coefficients[2,4], digits = 3))
pca_lm_row4 <- c("Particle", "Per-Capita", "PC1",
round(percap_lm_PC1_part$adj.r.squared, digits = 3),
round(percap_lm_PC1_part$coefficients[2,4], digits = 3))
pca_lm_results_df <-
rbind(pca_lm_row1, pca_lm_row2, pca_lm_row3, pca_lm_row4)
colnames(pca_lm_results_df) <- c("fraction", "Predictor","Prod_measure", "Adjusted_R2", "p-value")
row.names(pca_lm_results_df) = NULL
pander(pca_lm_results_df,
caption = "Single Linear regression statistics for PC1.")
Single Linear regression statistics for PC1.
| Free |
Per-Liter |
PC1 |
0.06 |
0.221 |
| Particle |
Per-Liter |
PC1 |
-0.075 |
0.641 |
| Free |
Per-Capita |
PC1 |
0.014 |
0.307 |
| Particle |
Per-Capita |
PC1 |
0.006 |
0.33 |
##### SINGLE REGRESSION with PC2
## Free living
comm_lm_PC2_free <- summary(lm(frac_bacprod ~ PC2,
data = filter(metadata_pca, fraction == "Free")))
## Particle-associated
comm_lm_PC2_part <- summary(lm(frac_bacprod ~ PC2,
data = filter(metadata_pca, fraction == "Particle")))
## PER CAPITA: Free living
percap_lm_PC2_free <- summary(lm(log10(fracprod_per_cell_noinf) ~ PC2,
data = filter(metadata_pca, fraction == "Free")))
## PER CAPITA: Particle-associated
percap_lm_PC2_part <- summary(lm(log10(fracprod_per_cell_noinf) ~ PC2,
data = filter(metadata_pca, fraction == "Particle")))
# Put the results all together
pca_lm2_row1 <- c("Free", "Per-Liter","PC2",
round(comm_lm_PC2_free$adj.r.squared, digits = 3),
round(comm_lm_PC2_free$coefficients[2,4], digits = 3))
pca_lm2_row2 <- c("Particle", "Per-Liter","PC2",
round(comm_lm_PC2_part$adj.r.squared, digits = 3),
round(comm_lm_PC2_part$coefficients[2,4], digits = 3))
pca_lm2_row3 <- c("Free", "Per-Capita","PC2",
round(percap_lm_PC2_free$adj.r.squared, digits = 3),
round(percap_lm_PC2_free$coefficients[2,4], digits = 3))
pca_lm2_row4 <- c("Particle", "Per-Capita", "PC2",
round(percap_lm_PC2_part$adj.r.squared, digits = 3),
round(percap_lm_PC2_part$coefficients[2,4], digits = 3))
pca_lm2_results_df <-
rbind(pca_lm2_row1, pca_lm2_row2, pca_lm2_row3, pca_lm2_row4)
colnames(pca_lm2_results_df) <- c("fraction", "Predictor","Prod_measure", "Adjusted_R2", "p-value")
row.names(pca_lm2_results_df) = NULL
pander(pca_lm2_results_df,
caption = "Single Linear regression statistics for PC2.")
Single Linear regression statistics for PC2.
| Free |
Per-Liter |
PC2 |
-0.056 |
0.531 |
| Particle |
Per-Liter |
PC2 |
0.207 |
0.077 |
| Free |
Per-Capita |
PC2 |
0.033 |
0.268 |
| Particle |
Per-Capita |
PC2 |
0.393 |
0.023 |
##### MULTIPLE REGRESSION
## Free living
summary(lm(frac_bacprod ~ PC1 + PC2,
data = filter(metadata_pca, fraction == "Free")))
##
## Call:
## lm(formula = frac_bacprod ~ PC1 + PC2, data = filter(metadata_pca,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.792 -10.435 -7.571 12.769 30.622
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.058 5.049 4.765 0.00102 **
## PC1 6.185 4.880 1.268 0.23676
## PC2 -3.262 4.880 -0.669 0.52058
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.49 on 9 degrees of freedom
## Multiple R-squared: 0.1858, Adjusted R-squared: 0.004856
## F-statistic: 1.027 on 2 and 9 DF, p-value: 0.3966
## Particle-associated
summary(lm(frac_bacprod ~ PC1 + PC2,
data = filter(metadata_pca, fraction == "Particle")))
##
## Call:
## lm(formula = frac_bacprod ~ PC1 + PC2, data = filter(metadata_pca,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.0696 -4.7286 -0.1293 3.1707 15.5457
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.958 2.198 4.531 0.00142 **
## PC1 1.147 2.124 0.540 0.60246
## PC2 -4.032 2.124 -1.898 0.09014 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.613 on 9 degrees of freedom
## Multiple R-squared: 0.302, Adjusted R-squared: 0.1469
## F-statistic: 1.947 on 2 and 9 DF, p-value: 0.1983
## PER CAPITA: Free living
summary(lm(log10(fracprod_per_cell_noinf) ~ PC1 + PC2,
data = filter(metadata_pca, fraction == "Free")))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ PC1 + PC2, data = filter(metadata_pca,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5522 -0.2055 -0.1250 0.2276 0.5222
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.5750 0.1109 -68.318 1.56e-13 ***
## PC1 0.1176 0.1072 1.097 0.301
## PC2 -0.1269 0.1072 -1.184 0.267
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3841 on 9 degrees of freedom
## Multiple R-squared: 0.2245, Adjusted R-squared: 0.05219
## F-statistic: 1.303 on 2 and 9 DF, p-value: 0.3185
## PER CAPITA: Particle-associated
summary(lm(log10(fracprod_per_cell_noinf) ~ PC1 + PC2,
data = filter(metadata_pca, fraction == "Particle")))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ PC1 + PC2, data = filter(metadata_pca,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.52230 -0.13916 -0.02677 0.17348 0.63461
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.6647 0.1112 -59.946 6.66e-12 ***
## PC1 0.2069 0.1081 1.913 0.0921 .
## PC2 -0.3653 0.1096 -3.332 0.0103 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3643 on 8 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6253, Adjusted R-squared: 0.5317
## F-statistic: 6.676 on 2 and 8 DF, p-value: 0.01971
PC1_lm_plot <-
ggplot(metadata_pca, aes(x = PC1, y = log10(fracprod_per_cell_noinf),
color = fraction, fill = fraction)) +
geom_point() + ylab("log10(cellular production)") +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_smooth(method = "lm") +
theme(legend.title = element_blank(), legend.position = "bottom")
PC2_lm_plot <-
ggplot(metadata_pca, aes(x = PC2, y = log10(fracprod_per_cell_noinf),
color = fraction, fill = fraction)) +
geom_point() + ylab("log10(cellular production)") +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_smooth(method = "lm") +
theme(legend.title = element_blank(), legend.position = "bottom")
plot_grid(PC2_lm_plot, PC2_lm_plot,
labels = c("A", "B"),
nrow = 1, ncol = 2)

par(oma(0.1, 0.1, 0.1, 0.1))
## Error in oma(0.1, 0.1, 0.1, 0.1): could not find function "oma"
biplot(pca_environ,
xlab = paste("PC1", paste(round(summary(pca_environ)$cont$importance[2,1]*100, digits = 2), "%", sep = ""), sep = ": "),
ylab = paste("PC2", paste(round(summary(pca_environ)$cont$importance[2,2]*100, digits = 2), "%", sep = ""), sep = ": "))

ggplot(filter(metadata, norep_filter_name != "MOTEJ515"),
aes(y = log10(as.numeric(fraction_bac_abund)), x = fraction)) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes( fill = fraction)) +
ylab("Log10(Bacterial Counts) \n (cells/mL)") +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
##### Particle vs free cell abundances
geom_path(data = dat1, aes(x = a, y = b), linetype = 1, color = "gray40") +
annotate("text", x=1.5, y=6.5, fontface = "bold", size = 3.5, color = "gray40",
label= paste("***\np =", round(frac_abund_wilcox$p.value, digits = 6))) +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.title.x = element_blank())
## Error in fortify(data): object 'dat1' not found
Figure 1
######################################################### Fraction ABUNDANCe
frac_abund_wilcox <- wilcox.test(log10(as.numeric(fraction_bac_abund)) ~ fraction, data = metadata)
frac_abund_wilcox
##
## Wilcoxon rank sum test
##
## data: log10(as.numeric(fraction_bac_abund)) by fraction
## W = 0, p-value = 1.479e-06
## alternative hypothesis: true location shift is not equal to 0
metadata %>%
group_by(fraction) %>%
summarize(mean(as.numeric(fraction_bac_abund)))
## # A tibble: 2 x 2
## fraction `mean(as.numeric(fraction_bac_abund))`
## <fctr> <dbl>
## 1 Particle 41168.88
## 2 Free 734522.25
# Make a data frame to draw significance line in boxplot (visually calculated)
dat1 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(6.45,6.5,6.5,6.45)) # WholePart vs WholeFree
poster_a <- ggplot(filter(metadata, norep_filter_name != "MOTEJ515"),
aes(y = log10(as.numeric(fraction_bac_abund)), x = fraction)) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes( fill = fraction)) +
ylab("Log10(Bacterial Counts) \n (cells/mL)") +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
##### Particle vs free cell abundances
geom_path(data = dat1, aes(x = a, y = b), linetype = 1, color = "gray40") +
annotate("text", x=1.5, y=6.5, fontface = "bold", size = 3.5, color = "gray40",
label= paste("***\np =", round(frac_abund_wilcox$p.value, digits = 6))) +
theme(legend.position = "bottom",
legend.title = element_blank(),
axis.title.x = element_blank())
######################################################### TOTAL PRODUCTION
totprod_wilcox <- wilcox.test(frac_bacprod ~ fraction, data = metadata)
totprod_wilcox
##
## Wilcoxon rank sum test
##
## data: frac_bacprod by fraction
## W = 33, p-value = 0.02418
## alternative hypothesis: true location shift is not equal to 0
metadata %>%
group_by(fraction) %>%
summarize(mean(frac_bacprod))
## # A tibble: 2 x 2
## fraction `mean(frac_bacprod)`
## <fctr> <dbl>
## 1 Particle 9.958333
## 2 Free 24.058333
# Make a data frame to draw significance line in boxplot (visually calculated)
dat2 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(71,72,72,71)) # WholePart vs WholeFree
poster_b <- ggplot(metadata, aes(y = frac_bacprod, x = fraction)) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes( fill = fraction)) +
scale_fill_manual(values = fraction_colors, guide = FALSE) +
scale_shape_manual(values = season_shapes) +
scale_y_continuous(limits = c(0, 73)) +
ylab("Community Production \n (μgC/L/hr)") +
##### Particle vs free bulk production
geom_path(data = dat2, aes(x = a, y = b), linetype = 1, color = "gray40") +
annotate("text", x=1.5, y=72, fontface = "bold", size = 3.5, color = "gray40",
label= paste("**\np =", round(totprod_wilcox$p.value, digits = 3))) +
theme(legend.position = "none",
axis.title.x = element_blank())
######################################################### TOTAL PRODUCTION
percellprod_wilcox <- wilcox.test(log10(fracprod_per_cell) ~ fraction, data = filter(metadata, norep_filter_name != "MOTEJ515"))
percellprod_wilcox
##
## Wilcoxon rank sum test
##
## data: log10(fracprod_per_cell) by fraction
## W = 125, p-value = 6.656e-05
## alternative hypothesis: true location shift is not equal to 0
filter(metadata, norep_filter_name != "MOTEJ515") %>%
group_by(fraction) %>%
summarize(mean(fracprod_per_cell))
## # A tibble: 2 x 2
## fraction `mean(fracprod_per_cell)`
## <fctr> <dbl>
## 1 Particle 4.816116e-07
## 2 Free 3.866798e-08
# Make a data frame to draw significance line in boxplot (visually calculated)
dat3 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(-5.05,-5,-5,-5.05)) # WholePart vs WholeFree
poster_c <- ggplot(filter(metadata, norep_filter_name != "MOTEJ515"),
aes(y = log10(fracprod_per_cell), x = fraction)) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes( fill = fraction)) +
scale_fill_manual(values = fraction_colors, guide = FALSE) +
scale_shape_manual(values = season_shapes) +
ylim(c(-8.5, -5)) +
ylab("Log10(Cellular Production) \n(μgC/cell/hr)") +
##### Particle vs free per-cell production
geom_path(data = dat3, aes(x = a, y = b), linetype = 1, color = "gray40") +
annotate("text", x=1.5, y=-5, fontface = "bold", size = 3.5, color = "gray40",
label= paste("***\np =", round(percellprod_wilcox$p.value, digits = 5))) +
theme(legend.position = "none",
axis.title.x = element_blank())
######## FIGURE 1
# legend
legend <- get_legend(poster_a)
row1_plots <- plot_grid(poster_a + theme(legend.position = "none"), poster_b, poster_c,
labels = c("A", "B", "C"),
ncol = 3, nrow = 1)
#fig_1 <-
plot_grid(row1_plots, legend,
ncol = 1, nrow = 2,
rel_heights = c(1, 0.05))

Figure S1
work_df <- metadata %>%
dplyr::select(norep_filter_name, fraction, fraction_bac_abund, frac_bacprod, fracprod_per_cell_noinf) %>%
mutate(norep_water_name = paste(substr(norep_filter_name, 1,4), substr(norep_filter_name, 6,9), sep = "")) %>%
dplyr::select(-norep_filter_name)
part_work_df <- work_df %>%
filter(fraction == "Particle") %>%
rename(part_bacabund = fraction_bac_abund,
part_prod = frac_bacprod,
part_percell_prod = fracprod_per_cell_noinf) %>%
dplyr::select(-fraction)
free_work_df <- work_df %>%
filter(fraction == "Free") %>%
rename(free_bacabund = fraction_bac_abund,
free_prod = frac_bacprod,
free_percell_prod = fracprod_per_cell_noinf) %>%
dplyr::select(-fraction)
byfrac_df <- part_work_df %>%
left_join(free_work_df, by = "norep_water_name")
byfrac_df$season <- substr(byfrac_df$norep_water_name, 5,5) # 7th letter = month sampled
byfrac_df$season <- as.character(byfrac_df$season)
byfrac_df$season <- ifelse(byfrac_df$season == "5", "Spring",
ifelse(byfrac_df$season == "7", "Summer",
ifelse(byfrac_df$season == "9", "Fall",
"NA")))
byfrac_df$season <- factor(byfrac_df$season, levels = c("Spring", "Summer", "Fall"))
summary(lm(log10(part_bacabund) ~ log10(free_bacabund), data = filter(byfrac_df, norep_water_name != "MOTE515")))
##
## Call:
## lm(formula = log10(part_bacabund) ~ log10(free_bacabund), data = filter(byfrac_df,
## norep_water_name != "MOTE515"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.52690 -0.08048 0.05903 0.19565 0.35255
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.0900 3.2247 0.648 0.533
## log10(free_bacabund) 0.4264 0.5532 0.771 0.461
##
## Residual standard error: 0.3111 on 9 degrees of freedom
## Multiple R-squared: 0.06194, Adjusted R-squared: -0.04229
## F-statistic: 0.5943 on 1 and 9 DF, p-value: 0.4605
plot1 <- ggplot(filter(byfrac_df, norep_water_name != "MOTE515"),
aes(x = log10(free_bacabund), y = log10(part_bacabund))) +
xlab("Free") + ylab("Particle") +
ggtitle("Log10(Bacterial Counts) \n (cells/mL)") +
geom_point(size = 3, fill = "grey", aes(shape = season), width = 0.2) +
scale_shape_manual(values = season_shapes) +
theme(legend.position = "bottom",
legend.title = element_blank())
lm_prod_corr <- lm(part_prod ~ free_prod, data = byfrac_df)
plot2 <- ggplot(byfrac_df, aes(x = free_prod, y = part_prod)) +
xlab("Free") + ylab("Particle") +
ggtitle("Community Production \n(μgC/L/hr)") +
geom_point(size = 3, fill = "grey", aes(shape = season), width = 0.2) +
scale_shape_manual(values = season_shapes) +
geom_smooth(method = "lm", color = "black") +
annotate("text", x =20, y= 30, color = "black", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_prod_corr)$adj.r.squared, digits = 3), "\n",
"p =", round(unname(summary(lm_prod_corr)$coefficients[,4][2]), digits = 3))) +
theme(legend.position = "none")
lm_percell_corr <- lm(log10(part_percell_prod) ~ log10(free_percell_prod), data = byfrac_df)
plot3 <- ggplot(byfrac_df,
aes(x = log10(free_percell_prod), y = log10(part_percell_prod))) +
xlab("Free") + ylab("Particle") +
ggtitle("Log10(Cellular Production) \n (μgC/cell/hr)") +
geom_point(size = 3, fill = "grey", aes(shape = season), width = 0.2) +
scale_shape_manual(values = season_shapes) +
geom_smooth(method = "lm", color = "black") +
annotate("text", y = -5.8, x=-7.9, color = "black", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_percell_corr)$adj.r.squared, digits = 3), "\n",
"p =", round(unname(summary(lm_percell_corr)$coefficients[,4][2]), digits = 3))) +
theme(legend.position = "none")
legend_s1 <- get_legend(plot1)
top_row_S1 <- plot_grid(plot1 +theme(legend.position = "none"), plot2, plot3,
nrow = 1, ncol = 3,
labels = c("A", "B", "C"),
align = "h")
plot_grid(top_row_S1, legend_s1,
rel_heights = c(1, 0.05),
nrow = 2, ncol = 1)

Linear Regressions with Fraction Diversity
#################################### Bulk Measure Production ####################################
################### Richness ###################
summary(lm(frac_bacprod ~ mean, data = richness)) # All samples together
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = richness)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.057 -12.017 -5.654 9.256 46.605
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.501168 9.039579 2.047 0.0528 .
## mean -0.003335 0.018911 -0.176 0.8616
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.54 on 22 degrees of freedom
## Multiple R-squared: 0.001412, Adjusted R-squared: -0.04398
## F-statistic: 0.0311 on 1 and 22 DF, p-value: 0.8616
# Particle-Associated
lm_prod_vs_rich_PA <- lm(frac_bacprod ~ mean, data = filter(richness, fraction == "Particle"))
summary(lm_prod_vs_rich_PA)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(richness, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.1271 -2.8865 -0.4649 3.7537 9.8401
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11.269531 5.717450 -1.971 0.07701 .
## mean 0.038125 0.009868 3.863 0.00314 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.476 on 10 degrees of freedom
## Multiple R-squared: 0.5988, Adjusted R-squared: 0.5587
## F-statistic: 14.93 on 1 and 10 DF, p-value: 0.003143
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_prod_vs_rich_PA <- train(
frac_bacprod ~ mean, data = filter(richness, fraction == "Particle"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_prod_vs_rich_PA$results # Particle-Associated CV results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 6.658983 0.6537518 2.255394 0.2948377
summary(lm(frac_bacprod ~ mean, data = filter(richness, fraction == "Free"))) # Free Living
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(richness, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.094 -11.112 -2.755 8.611 33.308
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.40793 21.62006 0.250 0.808
## mean 0.05511 0.06207 0.888 0.395
##
## Residual standard error: 17.7 on 10 degrees of freedom
## Multiple R-squared: 0.07306, Adjusted R-squared: -0.01963
## F-statistic: 0.7882 on 1 and 10 DF, p-value: 0.3955
################### Shannon Entropy ###################
summary(lm(frac_bacprod ~ mean, data = shannon)) # All samples together
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = shannon)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.659 -11.878 -5.666 9.231 46.593
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.63373 26.71652 0.623 0.540
## mean 0.08797 6.22969 0.014 0.989
##
## Residual standard error: 15.55 on 22 degrees of freedom
## Multiple R-squared: 9.064e-06, Adjusted R-squared: -0.04545
## F-statistic: 0.0001994 on 1 and 22 DF, p-value: 0.9889
# Particle-Associated
lm_prod_vs_shannon_PA <- lm(frac_bacprod ~ mean, data = filter(shannon, fraction == "Particle"))
summary(lm_prod_vs_shannon_PA)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(shannon, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.9022 -2.9150 -0.5875 1.6713 12.0544
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -40.320 14.007 -2.878 0.01643 *
## mean 11.089 3.068 3.614 0.00473 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.693 on 10 degrees of freedom
## Multiple R-squared: 0.5664, Adjusted R-squared: 0.5231
## F-statistic: 13.06 on 1 and 10 DF, p-value: 0.004734
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_prod_vs_shannon_PA <- train(
frac_bacprod ~ mean, data = filter(shannon, fraction == "Particle"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_prod_vs_shannon_PA$results # Particle-Associated CV results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 6.643704 0.7014407 2.470687 0.2662234
summary(lm(frac_bacprod ~ mean, data = filter(shannon, fraction == "Free"))) # Free Living
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(shannon, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.004 -10.708 -3.738 6.632 37.129
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -13.37 73.87 -0.181 0.860
## mean 9.40 18.50 0.508 0.622
##
## Residual standard error: 18.15 on 10 degrees of freedom
## Multiple R-squared: 0.02516, Adjusted R-squared: -0.07233
## F-statistic: 0.2581 on 1 and 10 DF, p-value: 0.6225
################### Inverse Simpson ###################
summary(lm(frac_bacprod ~ mean, data = invsimps)) # All samples together
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = invsimps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.269 -10.298 -4.916 5.866 46.452
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.1577 6.0225 2.019 0.0559 .
## mean 0.1629 0.1731 0.941 0.3570
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.25 on 22 degrees of freedom
## Multiple R-squared: 0.03868, Adjusted R-squared: -0.005019
## F-statistic: 0.8851 on 1 and 22 DF, p-value: 0.357
# Particle-Associated samples
lm_prod_vs_invsimps_PA <- lm(frac_bacprod ~ mean, data = filter(invsimps, fraction == "Particle"))
summary(lm_prod_vs_invsimps_PA)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(invsimps, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.5589 -2.1093 -0.1969 0.8752 7.8282
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.45199 2.43866 -0.185 0.85666
## mean 0.29344 0.05781 5.076 0.00048 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.571 on 10 degrees of freedom
## Multiple R-squared: 0.7204, Adjusted R-squared: 0.6925
## F-statistic: 25.77 on 1 and 10 DF, p-value: 0.0004804
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_prod_vs_invsimps_PA <- train(
frac_bacprod ~ mean, data = filter(invsimps, fraction == "Particle"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_prod_vs_invsimps_PA$results # Cross Validated PA results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 5.27195 0.7648654 2.096906 0.2523474
#Free Living Samples
summary(lm(frac_bacprod ~ mean, data = filter(invsimps, fraction == "Free")))
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(invsimps, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.765 -9.356 -4.445 6.116 36.017
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.0985 16.7052 0.664 0.521
## mean 0.5379 0.6598 0.815 0.434
##
## Residual standard error: 17.8 on 10 degrees of freedom
## Multiple R-squared: 0.06233, Adjusted R-squared: -0.03143
## F-statistic: 0.6648 on 1 and 10 DF, p-value: 0.4339
################### Simpson's Evenness ###################
summary(lm(frac_bacprod ~ mean, data = simpseven)) # All samples together
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = simpseven)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.000 -7.163 -3.815 3.392 45.845
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.8524 9.2286 -0.092 0.9272
## mean 274.1606 134.4253 2.040 0.0536 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.26 on 22 degrees of freedom
## Multiple R-squared: 0.159, Adjusted R-squared: 0.1208
## F-statistic: 4.16 on 1 and 22 DF, p-value: 0.05359
# Particle-Associated
lm_prod_vs_simpseven_PA <- lm(frac_bacprod ~ mean, data = filter(simpseven, fraction == "Particle"))
summary(lm_prod_vs_simpseven_PA)
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(simpseven, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.973 -2.229 -1.086 1.356 12.380
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.829 4.539 -0.844 0.41865
## mean 234.057 71.238 3.286 0.00821 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.995 on 10 degrees of freedom
## Multiple R-squared: 0.5191, Adjusted R-squared: 0.471
## F-statistic: 10.79 on 1 and 10 DF, p-value: 0.008211
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_prod_vs_simpseven_PA <- train(
frac_bacprod ~ mean, data = filter(simpseven, fraction == "Particle"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_prod_vs_simpseven_PA$results # Particle-Associated CV results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 6.377862 0.6423519 2.906471 0.2954007
summary(lm(frac_bacprod ~ mean, data = filter(simpseven, fraction == "Free"))) # Free Living
##
## Call:
## lm(formula = frac_bacprod ~ mean, data = filter(simpseven, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.389 -12.029 -3.306 4.668 39.946
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.85 23.52 0.674 0.516
## mean 114.96 321.02 0.358 0.728
##
## Residual standard error: 18.27 on 10 degrees of freedom
## Multiple R-squared: 0.01266, Adjusted R-squared: -0.08607
## F-statistic: 0.1282 on 1 and 10 DF, p-value: 0.7277
#################################### Per-Cell Production ####################################
################### Richness ###################
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = richness)) # All samples together
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = richness)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8668 -0.2124 0.1045 0.2133 0.6473
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.4591277 0.2212633 -38.231 < 2e-16 ***
## mean 0.0028988 0.0004641 6.246 3.4e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3804 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6501, Adjusted R-squared: 0.6334
## F-statistic: 39.02 on 1 and 21 DF, p-value: 3.395e-06
# Particle-Associated
lm_percell_prod_vs_rich_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, fraction == "Particle"))
summary(lm_percell_prod_vs_rich_PA)
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.55347 -0.21545 -0.01066 0.12536 0.58830
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.0617648 0.3717671 -21.685 4.44e-09 ***
## mean 0.0023794 0.0006348 3.748 0.00457 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3506 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.6095, Adjusted R-squared: 0.5662
## F-statistic: 14.05 on 1 and 9 DF, p-value: 0.004567
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_percell_prod_vs_rich_PA <- train(
log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, fraction == "Particle" & norep_filter_name != "MOTEJ515"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_percell_prod_vs_rich_PA$results # Particle-Associated CV results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 0.4164107 0.6183409 0.150773 0.3257465
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness, fraction == "Free"))) # Free Living
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(richness,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.71719 -0.13833 0.07155 0.23581 0.56949
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.135758 0.471253 -17.264 8.99e-09 ***
## mean 0.001657 0.001353 1.225 0.249
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3859 on 10 degrees of freedom
## Multiple R-squared: 0.1304, Adjusted R-squared: 0.04344
## F-statistic: 1.499 on 1 and 10 DF, p-value: 0.2488
################### Shannon Entropy ###################
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = shannon)) # All samples together
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = shannon)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.03514 -0.15042 -0.03394 0.26568 0.82794
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -10.9036 0.7534 -14.472 2.14e-12 ***
## mean 0.8805 0.1763 4.993 6.09e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4348 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5428, Adjusted R-squared: 0.521
## F-statistic: 24.93 on 1 and 21 DF, p-value: 6.09e-05
# Particle-Associated
lm_percell_prod_vs_shannon_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon, fraction == "Particle"))
summary(lm_percell_prod_vs_shannon_PA)
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.36686 -0.23571 -0.01330 0.03961 0.70312
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.8897 0.8820 -11.213 1.37e-06 ***
## mean 0.6993 0.1935 3.614 0.00562 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3584 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5921, Adjusted R-squared: 0.5468
## F-statistic: 13.06 on 1 and 9 DF, p-value: 0.00562
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_percell_prod_vs_shannon_PA <- train(
log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon, fraction == "Particle" & norep_filter_name != "MOTEJ515"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_percell_prod_vs_shannon_PA$results # Particle-Associated CV results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 0.4239945 0.6778371 0.1361161 0.3173377
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon, fraction == "Free"))) # Free Living
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(shannon,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.72444 -0.17785 0.08114 0.13946 0.67366
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.8666 1.6332 -5.429 0.000289 ***
## mean 0.3243 0.4091 0.793 0.446298
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4014 on 10 degrees of freedom
## Multiple R-squared: 0.05914, Adjusted R-squared: -0.03495
## F-statistic: 0.6285 on 1 and 10 DF, p-value: 0.4463
################### Inverse Simpson ###################
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = invsimps)) # All samples together
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = invsimps)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.97302 -0.28199 -0.05285 0.32003 0.99088
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.86039 0.18153 -43.301 < 2e-16 ***
## mean 0.02355 0.00525 4.485 0.000204 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4596 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.4893, Adjusted R-squared: 0.4649
## F-statistic: 20.12 on 1 and 21 DF, p-value: 0.0002037
# Particle-Associated
lm_percell_prod_vs_invsimps_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Particle"))
summary(lm_percell_prod_vs_invsimps_PA)
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28651 -0.18384 -0.11125 0.07337 0.56444
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.360961 0.159490 -46.153 5.27e-12 ***
## mean 0.018087 0.003759 4.812 0.000958 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2969 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.7201, Adjusted R-squared: 0.689
## F-statistic: 23.15 on 1 and 9 DF, p-value: 0.0009581
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_percell_prod_vs_invsimps_PA <- train(
log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Particle" & norep_filter_name != "MOTEJ515"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_percell_prod_vs_invsimps_PA$results # Particle-Associated CV results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 0.350356 0.7200567 0.1138028 0.3340058
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps, fraction == "Free"))) # Free Living
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(invsimps,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68269 -0.11512 0.01325 0.17742 0.63175
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.03513 0.35685 -22.517 6.72e-10 ***
## mean 0.01910 0.01409 1.355 0.205
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3803 on 10 degrees of freedom
## Multiple R-squared: 0.1551, Adjusted R-squared: 0.07064
## F-statistic: 1.836 on 1 and 10 DF, p-value: 0.2052
################### Simpson's Evenness ###################
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = simpseven)) # All samples together
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = simpseven)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.06874 -0.41920 0.04553 0.34511 1.56565
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.4496 0.4116 -18.10 2.74e-14 ***
## mean 4.3470 6.0344 0.72 0.479
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6353 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.02411, Adjusted R-squared: -0.02236
## F-statistic: 0.5189 on 1 and 21 DF, p-value: 0.4792
# Particle-Associated
lm_percell_prod_vs_simpseven_PA <- lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven, fraction == "Particle"))
summary(lm_percell_prod_vs_simpseven_PA)
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4726 -0.2230 -0.1106 0.1248 0.6887
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.5941 0.2885 -26.324 7.96e-10 ***
## mean 15.1887 4.6341 3.278 0.00957 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3788 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.5441, Adjusted R-squared: 0.4935
## F-statistic: 10.74 on 1 and 9 DF, p-value: 0.009566
# Cross validate particle-associated for a better estimate of the adjusted R-squared
cv_lm_percell_prod_vs_simpseven_PA <- train(
log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven, fraction == "Particle" & norep_filter_name != "MOTEJ515"),
method ='lm',
trControl = trainControl(method ="repeatedcv", number = 3, repeats = 100),
tuneGrid = expand.grid(intercept = TRUE))
cv_lm_percell_prod_vs_simpseven_PA$results # Particle-Associated CV results
## intercept RMSE Rsquared RMSESD RsquaredSD
## 1 TRUE 0.4354828 0.6660292 0.1432305 0.316585
summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven, fraction == "Free"))) # Free Living
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mean, data = filter(simpseven,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63112 -0.24216 0.01388 0.14672 0.77426
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.9274 0.5202 -15.240 3e-08 ***
## mean 4.9361 7.1008 0.695 0.503
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4041 on 10 degrees of freedom
## Multiple R-squared: 0.04609, Adjusted R-squared: -0.0493
## F-statistic: 0.4832 on 1 and 10 DF, p-value: 0.5028
R2 table
########## PUT A TABLE TOGETHER
# Per Liter Production
perliter_row1 <- c("Richness", "Per-Liter",
round(summary(lm_prod_vs_rich_PA)$adj.r.squared, digits = 3),
round(cv_lm_prod_vs_rich_PA$results$Rsquared, digits = 3),
round(cv_lm_prod_vs_rich_PA$results$RsquaredSD, digits = 3))
perliter_row2 <- c("Shannon_Entropy", "Per-Liter",
round(summary(lm_prod_vs_shannon_PA)$adj.r.squared, digits = 3),
round(cv_lm_prod_vs_shannon_PA$results$Rsquared, digits = 3),
round(cv_lm_prod_vs_shannon_PA$results$RsquaredSD, digits = 3))
perliter_row3 <- c("Inverse_Simpson", "Per-Liter",
round(summary(lm_prod_vs_invsimps_PA)$adj.r.squared, digits = 3),
round(cv_lm_prod_vs_invsimps_PA$results$Rsquared, digits = 3),
round(cv_lm_prod_vs_invsimps_PA$results$RsquaredSD, digits = 3))
perliter_row4 <- c("Simpsons_Evenness","Per-Liter",
round(summary(lm_prod_vs_simpseven_PA)$adj.r.squared, digits = 3),
round(cv_lm_prod_vs_simpseven_PA$results$Rsquared, digits = 3),
round(cv_lm_prod_vs_simpseven_PA$results$RsquaredSD, digits = 3))
# Per cell production
percell_row1 <- c("Richness", "Per-Cell",
round(summary(lm_percell_prod_vs_rich_PA)$adj.r.squared, digits = 3),
round(cv_lm_percell_prod_vs_rich_PA$results$Rsquared, digits = 3),
round(cv_lm_percell_prod_vs_rich_PA$results$RsquaredSD, digits = 3))
percell_row2 <- c("Shannon_Entropy", "Per-Cell",
round(summary(lm_percell_prod_vs_shannon_PA)$adj.r.squared, digits = 3),
round(cv_lm_percell_prod_vs_shannon_PA$results$Rsquared, digits = 3),
round(cv_lm_percell_prod_vs_shannon_PA$results$RsquaredSD, digits = 3))
percell_row3 <- c("Inverse_Simpson", "Per-Cell",
round(summary(lm_percell_prod_vs_invsimps_PA)$adj.r.squared, digits = 3),
round(cv_lm_percell_prod_vs_invsimps_PA$results$Rsquared, digits = 3),
round(cv_lm_percell_prod_vs_invsimps_PA$results$RsquaredSD, digits = 3))
percell_row4 <- c("Simpsons_Evenness", "Per-Cell",
round(summary(lm_percell_prod_vs_simpseven_PA)$adj.r.squared, digits = 3),
round(cv_lm_percell_prod_vs_simpseven_PA$results$Rsquared, digits = 3),
round(cv_lm_percell_prod_vs_simpseven_PA$results$RsquaredSD, digits = 3))
r2_table <- as.data.frame(rbind(perliter_row1, perliter_row2, perliter_row3, perliter_row4,
percell_row1, percell_row2, percell_row3, percell_row4))
colnames(r2_table) <- c("Diversity_Metric", "Production_Measure","Adjusted_R2","CV_R2", "CV_R2_SD")
row.names(r2_table) = NULL
pander(r2_table,
caption = "Supplemental Table 2: \n R2 estimates for heterotrophic production vs particle-associated diversity linear regressions.")
Supplemental Table 2: R2 estimates for heterotrophic production vs particle-associated diversity linear regressions.
| Richness |
Per-Liter |
0.559 |
0.654 |
0.295 |
| Shannon_Entropy |
Per-Liter |
0.523 |
0.701 |
0.266 |
| Inverse_Simpson |
Per-Liter |
0.692 |
0.765 |
0.252 |
| Simpsons_Evenness |
Per-Liter |
0.471 |
0.642 |
0.295 |
| Richness |
Per-Cell |
0.566 |
0.618 |
0.326 |
| Shannon_Entropy |
Per-Cell |
0.547 |
0.678 |
0.317 |
| Inverse_Simpson |
Per-Cell |
0.689 |
0.72 |
0.334 |
| Simpsons_Evenness |
Per-Cell |
0.493 |
0.666 |
0.317 |
Prepare Figure 2
######################################################### OBSERVED RICHNESS
rich_fraction_wilcox <- wilcox.test(mean ~ fraction, data = richness)
rich_fraction_wilcox
##
## Wilcoxon rank sum test
##
## data: mean by fraction
## W = 129, p-value = 0.0004955
## alternative hypothesis: true location shift is not equal to 0
filter(richness) %>%
group_by(fraction) %>%
summarize(mean(mean), sd(mean))
## # A tibble: 2 x 3
## fraction `mean(mean)` `sd(mean)`
## <fctr> <dbl> <dbl>
## 1 Particle 556.7992 167.31404
## 2 Free 338.4242 85.98665
# Make a data frame to draw significance line in boxplot (visually calculated)
rich_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(920, 930, 930, 920)) # WholePart vs WholeFree
rich_distribution_plot <-
ggplot(richness, aes(y = mean, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) +
xlab("Observed Richness") + xlab("Fraction") +
scale_shape_manual(values = season_shapes) +
geom_path(data = rich_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=790, fontface = "bold", size = 4, color = "#424645",
label= paste("p =", round(rich_fraction_wilcox$p.value, digits = 5))) +
theme(legend.position = "none",# axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# Richness vs Per-Liter Heterotrophic Production Plot
prod_vs_rich_plot <-
ggplot(richness, aes(x=mean, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, color = "black", aes(fill = fraction, shape = season)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
xlab("Observed Richness") +
geom_smooth(data=filter(richness, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) +
annotate("text", x = 790, y=45, color = "#FF6600", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_prod_vs_rich_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_prod_vs_rich_PA)$coefficients[,4][2]), digits = 3))) +
theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank())
# Richness vs Per-cell herterotrophic production Plot
percell_prod_vs_rich_plot <-
ggplot(richness, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, color = "black", aes(fill = fraction, shape = season)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
ylab("log10(production/cell)\n (μgC/cell/hr)") +
xlab("Observed Richness") +
geom_smooth(data=filter(richness, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) +
#scale_y_continuous(limits = c(-8e-7,5e-6), breaks = seq(from = 0, to = 6e-7, by = 3e-7)) +
annotate("text", x = 790, y=-7.5, color = "#FF6600", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_percell_prod_vs_rich_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_percell_prod_vs_rich_PA)$coefficients[,4][2]), digits = 3))) +
#annotate("text", x = 250, y=55, color = "skyblue", fontface = "bold", label = paste("Free = NS")) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
######################################################### INVERSE SIMPSON
invsimps_fraction_wilcox <- wilcox.test(mean ~ fraction, data = invsimps)
invsimps_fraction_wilcox
##
## Wilcoxon rank sum test
##
## data: mean by fraction
## W = 81, p-value = 0.6297
## alternative hypothesis: true location shift is not equal to 0
filter(invsimps) %>%
group_by(fraction) %>%
summarize(mean(mean), sd(mean))
## # A tibble: 2 x 3
## fraction `mean(mean)` `sd(mean)`
## <fctr> <dbl> <dbl>
## 1 Particle 35.47659 23.843325
## 2 Free 24.09219 8.136901
# Make a data frame to draw significance line in boxplot (visually calculated)
invsimps_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(83,85,85,83)) # WholePart vs WholeFree
invsimps_distribution_plot <-
ggplot(invsimps, aes(y = mean, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
ylab("Inverse Simpson") + xlab("Fraction") +
geom_path(data = invsimps_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=80, fontface = "bold", size = 4, color = "#424645", label= "NS") +
theme(legend.position = "none", #axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# INVERSE SIMPSON
# Plot Inverse Simpson vs Per-Liter Heterotrophic Production
prod_vs_invsimps_plot <-
ggplot(invsimps, aes(x=mean, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, color = "black", aes(fill = fraction, shape = season)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
xlab("Inverse Simpson") +
geom_smooth(data=filter(invsimps, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
annotate("text", x = 70, y=45, color = "#FF6600", fontface = "bold",size = 4,
label = paste("R2 =", round(summary(lm_prod_vs_invsimps_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_prod_vs_invsimps_PA)$coefficients[,4][2]), digits = 4))) +
theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank())
# Inverse Simpson vs per-cell production Plot
percell_prod_vs_invsimps_plot <-
ggplot(invsimps, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, color = "black", aes(fill = fraction, shape = season)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
ylab("log10(production/cell)\n (μgC/cell/hr)") +
xlab("Inverse Simpson") +
geom_smooth(data=filter(invsimps, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
#scale_y_continuous(limits = c(-8e-7,5e-6), breaks = seq(from = 0, to = 6e-7, by = 3e-7)) +
annotate("text", x = 70, y=-7.5, color = "#FF6600", fontface = "bold",size = 4,
label = paste("R2 =", round(summary(lm_percell_prod_vs_invsimps_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_percell_prod_vs_invsimps_PA)$coefficients[,4][2]), digits = 4))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
# Plot Inverse simpson plots altogether
invsimps_plots <- plot_grid(invsimps_distribution_plot, prod_vs_invsimps_plot, percell_prod_vs_invsimps_plot,
labels = c("B", "D", "F"), ncol = 1, nrow = 3,
rel_heights = c(0.5, 0.8, 1.1),
align = "v")
Prepare Figure S2
######################################################### SHANNON_ENTROPY
shannon_fraction_wilcox <- wilcox.test(mean ~ fraction, data = shannon)
shannon_fraction_wilcox
##
## Wilcoxon rank sum test
##
## data: mean by fraction
## W = 119, p-value = 0.00556
## alternative hypothesis: true location shift is not equal to 0
filter(shannon) %>%
group_by(fraction) %>%
summarize(mean(mean), sd(mean))
## # A tibble: 2 x 3
## fraction `mean(mean)` `sd(mean)`
## <fctr> <dbl> <dbl>
## 1 Particle 4.534078 0.5594638
## 2 Free 3.982314 0.2958221
# Make a data frame to draw significance line in boxplot (visually calculated)
shannon_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(5.8, 5.9, 5.9, 5.8)) # WholePart vs WholeFree
shannon_distribution_plot <-
ggplot(shannon, aes(y = mean, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, aes(shape = season, fill = fraction), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(3.5,6), breaks = seq(from = 3, to =6, by = 0.5)) +
xlab("Shannon Entropy") +
xlab("Fraction") +
scale_shape_manual(values = season_shapes) +
# Add line and pval
geom_path(data = shannon_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=5.6, fontface = "bold", size = 4, color = "#424645",
label= paste("*p =", round(shannon_fraction_wilcox$p.value, digits = 3))) +
theme(legend.position = "none",# axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# Shannon vs Per-Liter Heterotrophic Production Plot
prod_vs_shannon_plot <-
ggplot(shannon, aes(x=mean, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, color = "black", aes(fill = fraction, shape = season)) +
scale_color_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
scale_fill_manual(values = fraction_colors) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
scale_x_continuous(limits = c(3.5,6), breaks = seq(from = 3, to =6, by = 0.5)) +
xlab("Shannon Entropy") +
geom_smooth(data=filter(shannon, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
annotate("text", x = 5.25, y=45, color = "#FF6600", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_prod_vs_shannon_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_prod_vs_shannon_PA)$coefficients[,4][2]), digits = 3))) +
theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank())
# Richness vs Per-cell herterotrophic production Plot
percell_prod_vs_shannon_plot <-
ggplot(shannon, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, color = "black", aes(fill = fraction, shape = season)) +
scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
ylab("log10(production/cell)\n (μgC/cell/hr)") +
xlab("Shannon Entropy") +
geom_smooth(data=filter(shannon, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(3.5,6), breaks = seq(from = 3, to =6, by = 0.5)) +
#scale_y_continuous(limits = c(-8e-7,5e-6), breaks = seq(from = 0, to = 6e-7, by = 3e-7)) +
annotate("text", x = 5.25, y=-7.5, color = "#FF6600", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm_percell_prod_vs_shannon_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_percell_prod_vs_shannon_PA)$coefficients[,4][2]), digits = 3))) +
#annotate("text", x = 250, y=55, color = "skyblue", fontface = "bold", label = paste("Free = NS")) +
theme(legend.title = element_blank(), legend.position = "none")
shannon_plots <- plot_grid(shannon_distribution_plot, prod_vs_shannon_plot, percell_prod_vs_shannon_plot,
labels = c("A", "C", "E"), ncol = 1, nrow = 3,
rel_heights = c(0.5, 0.8, 1.1),
align = "v")
######################################################### INVERSE SIMPSON
simpseven_fraction_wilcox <- wilcox.test(mean ~ fraction, data = simpseven)
simpseven_fraction_wilcox
##
## Wilcoxon rank sum test
##
## data: mean by fraction
## W = 55, p-value = 0.3474
## alternative hypothesis: true location shift is not equal to 0
filter(simpseven) %>%
group_by(fraction) %>%
summarize(mean(mean), sd(mean))
## # A tibble: 2 x 3
## fraction `mean(mean)` `sd(mean)`
## <fctr> <dbl> <dbl>
## 1 Particle 0.05890559 0.02537484
## 2 Free 0.07138806 0.01716014
# Make a data frame to draw significance line in boxplot (visually calculated)
simpseven_nums <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(0.14, 0.15, 0.15, 0.14)) # WholePart vs WholeFree
simpseven_distribution_plot <-
ggplot(simpseven, aes(y = mean, x = fraction)) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_y_continuous(limits = c(0,0.15), breaks = seq(from = 0, to = 0.15, by = 0.03)) +
ylab("Simpson's Evenness") +
scale_shape_manual(values = season_shapes) +
xlab("Fraction") +
geom_path(data = simpseven_nums, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=0.14, fontface = "bold", size = 4, color = "#424645", label= "NS") +
theme(legend.position = "none",
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# SIMPSONS EVENNESS
# Plot Inverse Simpson vs Per-Liter Heterotrophic Production
prod_vs_simpseven_plot <-
ggplot(simpseven, aes(x=mean, y=frac_bacprod)) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction)) + # Y-axis errorbars
geom_point(size = 3.5, color = "black", aes(fill = fraction, shape = season)) +
scale_color_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
scale_fill_manual(values = fraction_colors) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
ylab("Simpson's Evenness") +
geom_smooth(data=filter(simpseven, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,0.15), breaks = seq(from = 0, to = 0.15, by = 0.03)) +
annotate("text", x = 0.14, y=15, color = "#FF6600", fontface = "bold",size = 4,
label = paste("R2 =", round(summary(lm_prod_vs_simpseven_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_prod_vs_simpseven_PA)$coefficients[,4][2]), digits = 3))) +
theme(legend.position = "none", axis.title.x = element_blank(), axis.text.x = element_blank())
# Plot
percell_prod_vs_simpseven_plot <-
ggplot(simpseven, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, color = "black", aes(fill = fraction, shape = season)) +
scale_color_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
scale_fill_manual(values = fraction_colors) +
ylab("log10(production/cell)\n (μgC/cell/hr)") +
xlab("Simpson's Evenness") +
geom_smooth(data=filter(simpseven, fraction == "Particle"), method='lm', color = "#FF6600", fill = "#FF6600") +
scale_x_continuous(limits = c(0,0.15), breaks = seq(from = 0, to = 0.15, by = 0.03)) +
annotate("text", x = 0.14, y=-6.3, color = "#FF6600", fontface = "bold",size = 4,
label = paste("R2 =", round(summary(lm_percell_prod_vs_simpseven_PA)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm_percell_prod_vs_simpseven_PA)$coefficients[,4][2]), digits = 4))) +
theme(legend.title = element_blank(), legend.position ="none")
simpseven_plots <- plot_grid(simpseven_distribution_plot, prod_vs_simpseven_plot, percell_prod_vs_simpseven_plot,
labels = c("B", "D", "F"), ncol = 1, nrow = 3,
rel_heights = c(0.5, 0.8, 1.1),
align = "v")
R2_val <- grobTree(textGrob(bquote(Adj~R^2 == .(paste(round(summary(lm_prod_vs_shannon_PA)$adj.r.squared, digits = 2)))),
x=0.5, y=0.5, gp=gpar(col="#FF6600", size=12, fontface="bold")))
## Error in grobTree(textGrob(bquote(Adj ~ R^2 == .(paste(round(summary(lm_prod_vs_shannon_PA)$adj.r.squared, : could not find function "grobTree"
R2 <- grobTree(textGrob(bquote(Adj~R^2~ . (paste("=", round(summary(lm_prod_vs_shannon_PA)$adj.r.squared, digits = 2)))),
#paste("p =", round(unname(summary(lm_prod_vs_shannon_PA)$coefficients[,4][2]), digits = 3)),
x=0.5, y=0.75, gp=gpar(col="#FF6600", size=12, fontface="bold")))
## Error in grobTree(textGrob(bquote(Adj ~ R^2 ~ .(paste("=", round(summary(lm_prod_vs_shannon_PA)$adj.r.squared, : could not find function "grobTree"
prod_vs_shannon_plot +
xlab("Shannon Entropy") + annotation_custom(R2)
## Error: object of type 'closure' is not subsettable

Prepare Figure S3
plot_all_rich_percell <-
ggplot(richness, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), color = "grey", alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, aes(shape = season), color = "black", fill = "grey") +
scale_shape_manual(values = season_shapes) +
ylab("log10(production/cell)\n (μgC/cell/hr)") + xlab("Observed Richness") +
geom_smooth(method='lm', color = "#424645", fill = "#424645") +
scale_x_continuous(limits = c(150,950), breaks = seq(from = 0, to =925, by = 150)) +
scale_y_continuous(limits = c(-8.5, -5.4), breaks = seq(from = -9, to = -5, by = 1)) +
annotate("text", x = 790, y=-8, color = "#424645", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = richness))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = richness))$coefficients[,4][2]), digits = 6))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
plot_all_shannon_percell <-
ggplot(shannon, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), color = "grey", alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, aes(shape = season), color = "black", fill = "grey") +
scale_shape_manual(values = season_shapes) +
ylab("log10(production/cell)\n (μgC/cell/hr)") + xlab("Shannon Entropy") +
geom_smooth(method='lm', color = "#424645", fill = "#424645") +
scale_x_continuous(limits = c(3.5,6), breaks = seq(from = 3, to =6, by = 0.5)) +
scale_y_continuous(limits = c(-8.5, -5.4), breaks = seq(from = -9, to = -5, by = 1)) +
annotate("text", x = 5.25, y=-8, color = "#424645", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = shannon))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = shannon))$coefficients[,4][2]), digits = 5))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
plot_all_invsimps_percell <-
ggplot(invsimps, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), color = "grey", alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, aes(shape = season), color = "black", fill = "grey") +
scale_shape_manual(values = season_shapes) +
ylab("log10(production/cell)\n (μgC/cell/hr)") + xlab("Inverse Simpson") +
geom_smooth(method='lm', color = "#424645", fill = "#424645") +
scale_x_continuous(limits = c(0,85), breaks = seq(from = 0, to = 85, by = 20)) +
scale_y_continuous(limits = c(-8.5, -5.4), breaks = seq(from = -9, to = -5, by = 1)) +
annotate("text", x = 70, y=-8, color = "#424645", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = invsimps))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = invsimps))$coefficients[,4][2]), digits = 5))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
plot_all_simpseven_percell <-
ggplot(simpseven, aes(x=mean, y=log10(fracprod_per_cell_noinf))) +
geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), color = "grey", alpha = 0.7) + # X-axis errorbars
geom_point(size = 3.5, aes(shape = season), color = "black", fill = "grey") +
scale_shape_manual(values = season_shapes) +
ylab("log10(production/cell)\n (μgC/cell/hr)") + xlab("Simpson's Evenness") +
geom_smooth(method='lm', color = "#424645", fill = "#424645") +
scale_x_continuous(limits = c(0,0.15), breaks = seq(from = 0, to = 0.15, by = 0.03)) +
scale_y_continuous(limits = c(-8.5, -5.4), breaks = seq(from = -9, to = -5, by = 1)) +
annotate("text", x = 0.12, y=-8, color = "#424645", fontface = "bold", size = 4,
label = paste("R2 =", round(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = simpseven))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(log10(fracprod_per_cell_noinf) ~ mean, data = simpseven))$coefficients[,4][2]), digits = 2))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
figs3_row1 <- plot_grid(plot_all_rich_percell + theme(legend.position = "none"),
plot_all_shannon_percell + theme(axis.title.y = element_blank(), legend.position = "none"),
plot_all_invsimps_percell + theme(axis.title.y = element_blank(), legend.position = "none"),
plot_all_simpseven_percell + theme(axis.title.y = element_blank(), legend.position = "none"),
align = "h", labels = c("A", "B", "C", "D"),
rel_widths = c(1.05, 0.9, 0.9, 0.9),
nrow = 1, ncol = 4)
######## FIGURE S3
# legend
legend_grey <- get_legend(plot_all_rich_percell)
plot_grid(figs3_row1, legend_grey,
ncol = 1, nrow = 2,
rel_heights = c(1, 0.05))

Perform Ridge and Lasso regression
Bulk Community Production: PARTICLE
# Set the seed for reproducibility of the grid values
set.seed(777)
################ PREPARE DATA ################
# Subset data needed and scale all o
scaled_comm_data <-
lasso_data_df_particle_noprod %>% # Use data only for the particle samples
scale() %>% # Scale all of the variables to have mean =0 and sd = 1
as.data.frame() # Make it a dataframe so that model.matrix function works (does not take a matrix)
# Set model parameters for community level data
## NOTE: there cannot be any data with NA
x = model.matrix(frac_bacprod ~ ., scaled_comm_data)[,-1]
y = scaled_comm_data$frac_bacprod
grid = 10^seq(10,-2,length = 100)
################ PREPARE TRAINING & TESTING DATA FOR CROSS VALIDATION ################
# Pull out test and training sets for cross validation
# We will use half the set to train the model and the 2nd half of the dataset to test the model
train <- sample(1:nrow(x), nrow(x)/2)
test <- -train
y_test <- y[test]
################ RIDGE ################
# Run RIDGE regression with alpha = 0
ridge_divs_train <- glmnet(x[train,], y[train], alpha = 0, lambda = grid, thresh = 1e-12, standardize = TRUE)
par(mfrow = c(1,2))
plot(ridge_divs_train)
# Cross validation
cv_ridge_divs <- cv.glmnet(x[train,], y[train], alpha = 0)
plot(cv_ridge_divs)

best_ridge_lambda <- cv_ridge_divs$lambda.min
ridge_divs_pred <- predict(ridge_divs_train, s = best_ridge_lambda, newx = x[test,])
mean((ridge_divs_pred - y_test)^2) # Test MSE
## [1] 1.124927
## Run ridge on the entire dataset
ridge_divs <- glmnet(x, y, alpha = 0, lambda = grid, standardize = TRUE)
par(mfrow = c(1,1))
plot(ridge_divs)

################ LASSO ################
# Run lasso regression with alpha = 1
lasso_divs_train <- glmnet(x[train,], y[train], alpha = 1, lambda = grid, standardize = TRUE)
par(mfrow = c(1,2))
plot(lasso_divs_train)
# Cross validation
cv_lasso_divs <- cv.glmnet(x[train,], y[train], alpha = 1)
plot(cv_lasso_divs)

best_lasso_lambda <- cv_lasso_divs$lambda.min
lasso_divs_pred <- predict(lasso_divs_train, s = best_lasso_lambda, newx = x[test,])
mean((lasso_divs_pred - y_test)^2)
## [1] 1.405385
## Run lasso on the entire dataset with the best lambda value
lasso_divs <- glmnet(x, y, alpha = 1, lambda = grid, standardize = TRUE)
par(mfrow = c(1,1))
plot(lasso_divs)

plot(lasso_divs, xvar = "lambda", label = TRUE)

plot(lasso_divs, xvar = "dev", label = TRUE)

# What are the lasso coefficients? (Anything with a . is not selected by the model)
predict(lasso_divs, type = "coefficients", s = best_lasso_lambda)
## 35 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -1.499450e-17
## Sample_depth_m .
## Temp_C .
## SpCond_uSpercm .
## TDS_mgperL .
## pH .
## ORP_mV .
## Chl_Lab_ugperL .
## Cl_mgperL .
## SO4_mgperL .
## NO3_mgperL .
## NH3_mgperL .
## TKN_mgperL .
## SRP_ugperL .
## TP_ugperL .
## Alk_mgperL .
## DO_mgperL .
## DO_percent .
## total_bac_abund .
## attached_bac .
## perc_attached_abund .
## perc_attached_bacprod .
## fraction_bac_abund .
## PC1 .
## PC2 .
## PC3 .
## PC4 .
## PC5 .
## PC6 .
## Richness .
## Shannon_Entropy .
## Inverse_Simpson 1.609355e-01
## Simpsons_Evenness .
## Unweighted_PD .
## Weighted_PD .
The test MSE for ridge regression is 1.124927 while the test MSE for lasso is 1.4053855.
Additionally, the lasso model uses Inverse Simpson as the best and only predictor of production!
Per capita Production: PARTICLE
scaled_percapita_data <- percell_lasso_data_df_particle_noprod %>%
dplyr::filter(!is.na(fracprod_per_cell_noinf)) %>%
mutate(log10_percell = log10(fracprod_per_cell_noinf)) %>%
dplyr::select(-c(fracprod_per_cell_noinf)) %>%
scale() %>%
as.data.frame()
set.seed(777)
# Set model parameters for community level data
## NOTE: there cannot be any data with NA
x = model.matrix(log10_percell ~ ., scaled_percapita_data)[,-1]
y = scaled_percapita_data$log10_percell
grid = 10^seq(10,-2,length = 100)
# Pull out test and training sets for cross validation
# We will use half the set to train the model and the 2nd half of the dataset to test the model
train <- sample(1:nrow(x), nrow(x)/2)
test <- -train
y_test <- y[test]
################ RIDGE
# Run RIDGE regression with alpha = 0
ridge_divs_train <- glmnet(x[train,], y[train], alpha = 0, lambda = grid, thresh = 1e-12, standardize = FALSE)
par(mfrow = c(1,2))
plot(ridge_divs_train)
# Cross validation
cv_ridge_divs <- cv.glmnet(x[train,], y[train], alpha = 0)
plot(cv_ridge_divs)

best_ridge_lambda <- cv_ridge_divs$lambda.min
ridge_divs_pred <- predict(ridge_divs_train, s = best_ridge_lambda, newx = x[test,])
mean((ridge_divs_pred - y_test)^2) # Test MSE
## [1] 1.658946
## Run ridge on the entire dataset
ridge_divs <- glmnet(x, y, alpha = 0, lambda = grid, standardize = FALSE)
par(mfrow = c(1,1))
plot(ridge_divs)

################ LASSO
# Run lasso regression with alpha = 1
lasso_divs_train <- glmnet(x[train,], y[train], alpha = 1, lambda = grid, standardize = FALSE)
par(mfrow = c(1,2))
plot(lasso_divs_train)
# Cross validation
cv_lasso_divs <- cv.glmnet(x[train,], y[train], alpha = 1)
plot(cv_lasso_divs)

best_lasso_lambda <- cv_lasso_divs$lambda.min
lasso_divs_pred <- predict(lasso_divs_train, s = best_lasso_lambda, newx = x[test,])
mean((lasso_divs_pred - y_test)^2)
## [1] 1.780649
## Run lasso on the entire dataset
lasso_divs <- glmnet(x, y, alpha = 1, lambda = grid, standardize = FALSE)
par(mfrow = c(1,1))
plot(lasso_divs)

plot(lasso_divs, xvar = "lambda", label = TRUE)

plot(lasso_divs, xvar = "dev", label = TRUE)

# What are the lasso coefficients?
predict(lasso_divs, type = "coefficients", s = best_lasso_lambda)
## 35 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) -7.288386e-16
## Sample_depth_m .
## Temp_C -1.356874e-01
## SpCond_uSpercm .
## TDS_mgperL .
## pH .
## ORP_mV .
## Chl_Lab_ugperL .
## Cl_mgperL .
## SO4_mgperL .
## NO3_mgperL .
## NH3_mgperL .
## TKN_mgperL .
## SRP_ugperL .
## TP_ugperL .
## Alk_mgperL .
## DO_mgperL .
## DO_percent .
## total_bac_abund -9.147228e-04
## attached_bac -2.317379e-02
## perc_attached_abund .
## perc_attached_bacprod .
## fraction_bac_abund .
## PC1 .
## PC2 .
## PC3 .
## PC4 .
## PC5 -2.691873e-02
## PC6 .
## Richness .
## Shannon_Entropy .
## Inverse_Simpson 4.161940e-01
## Simpsons_Evenness .
## Unweighted_PD .
## Weighted_PD .
Per-capita Production: ALL SAMPLES
set.seed(777)
scaled_percapita_ALL <- all_dat_lasso_percapita %>%
dplyr::filter(!is.na(fracprod_per_cell_noinf)) %>%
mutate(log10_percell = log10(fracprod_per_cell_noinf)) %>%
dplyr::select(-c(fracprod_per_cell_noinf)) %>%
scale() %>%
as.data.frame()
# Set model parameters for community level data
## NOTE: there cannot be any data with NA
x = model.matrix(log10_percell ~ ., scaled_percapita_ALL)[,-1]
y = scaled_percapita_ALL$log10_percell
grid = 10^seq(10,-2,length = 100)
# Pull out test and training sets for cross validation
# We will use half the set to train the model and the 2nd half of the dataset to test the model
train <- sample(1:nrow(x), nrow(x)/2)
test <- -train
y_test <- y[test]
################ RIDGE
# Run RIDGE regression with alpha = 0
ridge_divs_train <- glmnet(x[train,], y[train], alpha = 0, lambda = grid, thresh = 1e-12, standardize = TRUE)
par(mfrow = c(1,2))
plot(ridge_divs_train)
# Cross validation
cv_ridge_divs <- cv.glmnet(x[train,], y[train], alpha = 0)
plot(cv_ridge_divs)

best_ridge_lambda <- cv_ridge_divs$lambda.min
ridge_divs_pred <- predict(ridge_divs_train, s = best_ridge_lambda, newx = x[test,])
mean((ridge_divs_pred - y_test)^2) # Test MSE
## [1] 1.034983
## Run ridge on the entire dataset
ridge_divs <- glmnet(x, y, alpha = 0, lambda = grid, standardize = TRUE)
par(mfrow = c(1,1))
plot(ridge_divs)

################ LASSO
# Run lasso regression with alpha = 1
lasso_divs_train <- glmnet(x[train,], y[train], alpha = 1, lambda = grid, standardize = TRUE)
par(mfrow = c(1,2))
plot(lasso_divs_train)
# Cross validation
cv_lasso_divs <- cv.glmnet(x[train,], y[train], alpha = 1)
plot(cv_lasso_divs)

best_lasso_lambda <- cv_lasso_divs$lambda.min
lasso_divs_pred <- predict(lasso_divs_train, s = best_lasso_lambda, newx = x[test,])
mean((lasso_divs_pred - y_test)^2)
## [1] 0.7951617
## Run lasso on the entire dataset
lasso_divs <- glmnet(x, y, alpha = 1, lambda = grid, standardize = TRUE)
par(mfrow = c(1,1))
plot(lasso_divs)

plot(lasso_divs, xvar = "lambda", label = TRUE)

plot(lasso_divs, xvar = "dev", label = TRUE)

# What are the lasso coefficients?
predict(lasso_divs, type = "coefficients", s = best_lasso_lambda)
## 35 x 1 sparse Matrix of class "dgCMatrix"
## 1
## (Intercept) 8.490670e-16
## Sample_depth_m .
## Temp_C .
## SpCond_uSpercm .
## TDS_mgperL .
## pH -1.566237e-01
## ORP_mV .
## Chl_Lab_ugperL .
## Cl_mgperL .
## SO4_mgperL -2.666180e-03
## NO3_mgperL .
## NH3_mgperL .
## TKN_mgperL .
## SRP_ugperL .
## TP_ugperL .
## Alk_mgperL .
## DO_mgperL .
## DO_percent .
## total_bac_abund .
## attached_bac .
## perc_attached_abund .
## perc_attached_bacprod .
## fraction_bac_abund -1.327209e-01
## PC1 .
## PC2 .
## PC3 .
## PC4 .
## PC5 .
## PC6 .
## Richness 3.769169e-01
## Shannon_Entropy .
## Inverse_Simpson .
## Simpsons_Evenness .
## Unweighted_PD -7.126563e-02
## Weighted_PD .
Figure 3: Unweighted ses.mpd
######################################################### SES MPD DISTRIBUTION: UNWEIGHTED
unweighted_fraction_wilcox <- wilcox.test(mpd.obs.z ~ fraction, data = unweighted_df)
unweighted_fraction_wilcox
##
## Wilcoxon rank sum test
##
## data: mpd.obs.z by fraction
## W = 30, p-value = 0.01449
## alternative hypothesis: true location shift is not equal to 0
unweighted_df %>%
group_by(fraction) %>%
summarize(mean(mpd.obs.z))
## # A tibble: 2 x 2
## fraction `mean(mpd.obs.z)`
## <fctr> <dbl>
## 1 Particle -0.4842762
## 2 Free 0.4280979
# Make a data frame to draw significance line in boxplot (visually calculated)
dat4 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(1.6,1.7,1.7,1.6)) # WholePart vs WholeFree
unweight_distribution_plot <-
ggplot(unweighted_df, aes(y = mpd.obs.z, x = fraction)) +
#scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_shape_manual(values = season_shapes) +
scale_y_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
xlab("Fraction") +
geom_hline(yintercept = 0, linetype = "dashed", size = 1.5) +
##### Particle vs free per-cell production
geom_path(data = dat4, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=1.35, fontface = "bold", size = 4, color = "#424645",
label= paste("p =", round(unweighted_fraction_wilcox$p.value, digits = 2))) +
theme(legend.position = "none",# axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# Is there a relationship between richness and Unweighted Mean Pairwise distance?
summary(lm(mean ~ mpd.obs.z, data = unweighted_df))
##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = unweighted_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -194.03 -100.85 -10.60 84.21 294.14
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 444.12 28.28 15.703 1.95e-13 ***
## mpd.obs.z -124.30 34.19 -3.636 0.00146 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 138.5 on 22 degrees of freedom
## Multiple R-squared: 0.3754, Adjusted R-squared: 0.347
## F-statistic: 13.22 on 1 and 22 DF, p-value: 0.001459
summary(lm(mean ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Particle")))
##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(unweighted_df, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -189.00 -106.35 -44.10 58.05 274.78
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 515.62 50.25 10.261 1.25e-06 ***
## mpd.obs.z -85.02 48.76 -1.744 0.112
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 153.7 on 10 degrees of freedom
## Multiple R-squared: 0.2332, Adjusted R-squared: 0.1565
## F-statistic: 3.041 on 1 and 10 DF, p-value: 0.1118
summary(lm(mean ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Free")))
##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(unweighted_df, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -100.20 -66.23 -17.30 43.28 172.40
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 338.3215 41.0307 8.246 9.03e-06 ***
## mpd.obs.z 0.2398 74.0807 0.003 0.997
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 90.18 on 10 degrees of freedom
## Multiple R-squared: 1.048e-06, Adjusted R-squared: -0.1
## F-statistic: 1.048e-05 on 1 and 10 DF, p-value: 0.9975
unweight_rich_vs_mpd_plot <-
ggplot(unweighted_df, aes(y = mean, x = mpd.obs.z)) +
geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, aes(fill = fraction, shape = season)) + ylab("Observed Richness") +
scale_shape_manual(values = season_shapes) +
xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
geom_smooth(method = "lm", color = "#424645", fill = "#424645", alpha = 0.3) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
annotate("text", x = 0.75, y=750, color = "#424645", fontface = "bold",
label = paste("R2 =", round(summary(lm(mean ~ mpd.obs.z, data = unweighted_df))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(mean ~ mpd.obs.z, data = unweighted_df))$coefficients[,4][2]), digits = 3))) +
theme(legend.title = element_blank(), legend.position = "none",
axis.text.x = element_blank(), axis.title.x = element_blank())
# Is there a relationship between Production and Unweighted Mean Pairwise distance?
#summary(lm(frac_bacprod ~ mpd.obs.z, data = unweighted_df)) # NS
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Particle")))
##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.230 -4.036 -1.734 3.173 19.457
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.247 2.582 3.194 0.00959 **
## mpd.obs.z -3.533 2.505 -1.410 0.18882
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.896 on 10 degrees of freedom
## Multiple R-squared: 0.1659, Adjusted R-squared: 0.08246
## F-statistic: 1.989 on 1 and 10 DF, p-value: 0.1888
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Free")))
##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(unweighted_df,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.508 -14.439 1.956 8.581 37.337
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.989 8.104 2.343 0.0411 *
## mpd.obs.z 11.841 14.632 0.809 0.4372
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.81 on 10 degrees of freedom
## Multiple R-squared: 0.06146, Adjusted R-squared: -0.0324
## F-statistic: 0.6548 on 1 and 10 DF, p-value: 0.4372
unweight_prod_vs_mpd_plot <-
ggplot(unweighted_df, aes(y = frac_bacprod, x = mpd.obs.z, fill = fraction)) +
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, aes(shape = season)) +
scale_shape_manual(values = season_shapes) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
theme(legend.title = element_blank(), legend.position = "none",
axis.text.x = element_blank(), axis.title.x = element_blank())
# Is there a relationship between PER-CELL PRODUCTION and Unweighted Mean Pairwise distance?
unweight_vs_percell <- lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = unweighted_df)
summary(unweight_vs_percell)
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = unweighted_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.69739 -0.28558 0.00112 0.11739 1.31840
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.19827 0.09996 -72.011 < 2e-16 ***
## mpd.obs.z -0.49468 0.12004 -4.121 0.000487 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4782 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.4471, Adjusted R-squared: 0.4208
## F-statistic: 16.98 on 1 and 21 DF, p-value: 0.0004866
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Particle")))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.38323 -0.23870 -0.19095 0.05971 1.17646
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.9197 0.1684 -41.090 1.49e-11 ***
## mpd.obs.z -0.3279 0.1595 -2.056 0.0699 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4628 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.3196, Adjusted R-squared: 0.244
## F-statistic: 4.228 on 1 and 9 DF, p-value: 0.0699
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df, fraction == "Free")))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(unweighted_df,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.62822 -0.30045 0.09582 0.16090 0.77191
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.54050 0.18773 -40.167 2.19e-12 ***
## mpd.obs.z -0.08069 0.33894 -0.238 0.817
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4126 on 10 degrees of freedom
## Multiple R-squared: 0.005636, Adjusted R-squared: -0.0938
## F-statistic: 0.05668 on 1 and 10 DF, p-value: 0.8166
unweight_percell_vs_mpd_plot <-
ggplot(unweighted_df,
aes(y = log10(fracprod_per_cell_noinf), x = mpd.obs.z)) +
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, aes(fill = fraction, shape = season)) +
ylab("log10(Production/cell)\n (μgC/cell/hr)") +
xlab("Unweighted Phylogenetic Diversity") +
scale_fill_manual(values = fraction_colors) +
scale_shape_manual(values = season_shapes) +
geom_smooth(method = "lm", color="#424645", fill = "#424645", alpha = 0.3) +
#stat_smooth(method="lm", se=TRUE, formula= y ~ poly(x, 2, raw=TRUE), color="#424645", fill = "#424645", alpha = 0.3) +
scale_y_continuous(limits = c(-8.5,-5), breaks = c(-8, -7, -6)) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
annotate("text", x = 0.75, y=-6, color = "#424645", fontface = "bold",
label = paste("R2 =", round(summary(unweight_vs_percell)$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(unweight_vs_percell)$coefficients[,4][2]), digits = 4))) +
theme(legend.title = element_blank(), legend.position ="bottom",
legend.text = element_text(size = 14))
######## FIGURE 3
figure3_row1 <- plot_grid(unweight_distribution_plot, unweight_rich_vs_mpd_plot, unweight_prod_vs_mpd_plot,
unweight_percell_vs_mpd_plot + theme(legend.position = "none"),
labels = c("A", "B", "C", "D"), ncol = 1, nrow = 4,
rel_heights = c(0.5, 1, 1, 1.2),
align = "v")
plot_grid(figure3_row1, season_legend,
ncol = 1, nrow = 2,
rel_heights = c(1, 0.05))

Figure S6: Weighted ses.mpd
######################################################### SES MPD DISTRIBUTION: WEIGHTED
weighted_fraction_wilcox <- wilcox.test(mpd.obs.z ~ fraction, data = weighted_df)
weighted_fraction_wilcox
##
## Wilcoxon rank sum test
##
## data: mpd.obs.z by fraction
## W = 80, p-value = 0.6707
## alternative hypothesis: true location shift is not equal to 0
filter(weighted_df) %>%
group_by(fraction) %>%
summarize(mean(mpd.obs.z))
## # A tibble: 2 x 2
## fraction `mean(mpd.obs.z)`
## <fctr> <dbl>
## 1 Particle -0.3484573
## 2 Free -0.3877349
# Make a data frame to draw significance line in boxplot (visually calculated)
dat5 <- data.frame(a = c(1.15,1.15,1.85,1.85), b = c(1.6,1.7,1.7,1.6)) # WholePart vs WholeFree
weight_distribution_plot <-
ggplot(weighted_df, aes(y = mpd.obs.z, x = fraction)) +
#scale_color_manual(values = fraction_colors) +
scale_fill_manual(values = fraction_colors) +
geom_jitter(size = 3.5, aes(fill = fraction, shape = season), width = 0.2) +
geom_boxplot(alpha = 0.5, outlier.shape = NA, aes(fill = fraction)) +
scale_shape_manual(values = season_shapes) +
scale_y_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
xlab("Fraction") +
geom_hline(yintercept = 0, linetype = "dashed", size = 1.5) +
##### Particle vs free per-cell production
geom_path(data = dat5, aes(x = a, y = b), linetype = 1, color = "#424645") +
annotate("text", x=1.5, y=1.55, fontface = "bold", size = 3.5, color = "#424645", label= "NS") +
theme(legend.position = "none", #axis.title.y = element_blank(),
axis.title.x = element_blank(), axis.text.x = element_blank()) +
coord_flip()
# Is there a relationship between inverse simpson and Weighted Mean Pairwise distance?
#summary(lm(mean ~ mpd.obs.z, data = weighted_df)) # NS
summary(lm(mean ~ mpd.obs.z, data = filter(weighted_df, fraction == "Particle")))
##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(weighted_df, fraction ==
## "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.184 -20.664 0.742 13.835 39.244
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 23.32 12.24 1.904 0.086 .
## mpd.obs.z -34.90 29.31 -1.191 0.261
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.4 on 10 degrees of freedom
## Multiple R-squared: 0.1242, Adjusted R-squared: 0.03661
## F-statistic: 1.418 on 1 and 10 DF, p-value: 0.2612
summary(lm(mean ~ mpd.obs.z, data = filter(weighted_df, fraction == "Free")))
##
## Call:
## lm(formula = mean ~ mpd.obs.z, data = filter(weighted_df, fraction ==
## "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.0180 -4.2707 -0.7356 3.6842 18.1340
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.787 4.868 4.475 0.00119 **
## mpd.obs.z -5.945 10.883 -0.546 0.59683
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.409 on 10 degrees of freedom
## Multiple R-squared: 0.02898, Adjusted R-squared: -0.06812
## F-statistic: 0.2984 on 1 and 10 DF, p-value: 0.5968
weight_invsimps_vs_mpd_plot <-
ggplot(weighted_df, aes(y = mean, x = mpd.obs.z, fill = fraction)) +
geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3.5, aes(shape = season)) + ylab("Inverse Simpson") +
scale_shape_manual(values = season_shapes) +
#xlab("Standardized Effect Size \n Weighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
theme(legend.title = element_blank(), legend.position = "none",
axis.text.x = element_blank(), axis.title.x = element_blank())
# Is there a relationship between PER-LITER PRODUCTION and WEIGHTED Mean Pairwise distance?
summary(lm(frac_bacprod ~ mpd.obs.z, data = weighted_df))
##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = weighted_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.495 -9.462 -2.031 5.401 38.918
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.225 5.711 1.615 0.120
## mpd.obs.z -21.146 13.195 -1.603 0.123
##
## Residual standard error: 14.72 on 22 degrees of freedom
## Multiple R-squared: 0.1045, Adjusted R-squared: 0.06383
## F-statistic: 2.568 on 1 and 22 DF, p-value: 0.1233
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(weighted_df, fraction == "Particle")))
##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(weighted_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.986 -4.879 -1.114 5.303 15.080
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.601 4.042 1.138 0.281
## mpd.obs.z -15.374 9.673 -1.589 0.143
##
## Residual standard error: 7.725 on 10 degrees of freedom
## Multiple R-squared: 0.2016, Adjusted R-squared: 0.1218
## F-statistic: 2.526 on 1 and 10 DF, p-value: 0.1431
summary(lm(frac_bacprod ~ mpd.obs.z, data = filter(weighted_df, fraction == "Free")))
##
## Call:
## lm(formula = frac_bacprod ~ mpd.obs.z, data = filter(weighted_df,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.530 -14.203 -3.927 7.777 31.965
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.50 10.18 1.523 0.159
## mpd.obs.z -22.07 22.75 -0.970 0.355
##
## Residual standard error: 17.58 on 10 degrees of freedom
## Multiple R-squared: 0.08605, Adjusted R-squared: -0.005349
## F-statistic: 0.9415 on 1 and 10 DF, p-value: 0.3548
weight_prod_vs_mpd_plot <-
ggplot(weighted_df, aes(y = frac_bacprod, x = mpd.obs.z, fill = fraction)) +
geom_errorbar(aes(ymin = frac_bacprod - SD_frac_bacprod, ymax = frac_bacprod + SD_frac_bacprod, color = fraction), alpha = 0.7) + # X-axis errorbars
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3.5, aes(shape = season)) +
scale_shape_manual(values = season_shapes) +
ylab("Heterotrophic Production \n (μgC/L/hr)") +
xlab("Standardized Effect Size \n Weighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
theme(legend.title = element_blank(), legend.position = "none",
axis.text.x = element_blank(), axis.title.x = element_blank())
# Is there a relationship between PER-CELL PRODUCTION and WEIGHTED Mean Pairwise distance?
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = weighted_df))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = weighted_df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.94479 -0.39205 -0.05774 0.34127 1.39772
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.5104 0.2485 -30.222 <2e-16 ***
## mpd.obs.z -0.8987 0.5626 -1.597 0.125
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6073 on 21 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1083, Adjusted R-squared: 0.06587
## F-statistic: 2.551 on 1 and 21 DF, p-value: 0.1251
summary(lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df, fraction == "Particle")))
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.64641 -0.31219 0.02897 0.23609 0.91465
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.0955 0.2908 -24.400 1.56e-09 ***
## mpd.obs.z -0.9946 0.6678 -1.489 0.171
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5026 on 9 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.1977, Adjusted R-squared: 0.1086
## F-statistic: 2.218 on 1 and 9 DF, p-value: 0.1706
lm_percell_free_mpd <- lm(log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df, fraction == "Free"))
summary(lm_percell_free_mpd)
##
## Call:
## lm(formula = log10(fracprod_per_cell_noinf) ~ mpd.obs.z, data = filter(weighted_df,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.53303 -0.19763 0.01741 0.18356 0.46425
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.9398 0.1992 -39.865 2.36e-12 ***
## mpd.obs.z -0.9408 0.4452 -2.113 0.0607 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.344 on 10 degrees of freedom
## Multiple R-squared: 0.3087, Adjusted R-squared: 0.2395
## F-statistic: 4.465 on 1 and 10 DF, p-value: 0.06074
weight_percell_vs_mpd_plot <-
ggplot(weighted_df,
aes(y = log10(fracprod_per_cell_noinf), x = mpd.obs.z, fill = fraction)) +
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3.5, aes(shape = season)) +
scale_shape_manual(values = season_shapes) +
ylab("log10(Production/cell)\n (μgC/cell/hr)") +
xlab("Weighted Phylogenetic Diversity") +
scale_fill_manual(values = fraction_colors) +
geom_smooth(method = "lm", data = filter(weighted_df, fraction == "Free"), color = "skyblue", fill = "skyblue", alpha = 0.3) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
scale_y_continuous(limits = c(-8.5,-5), breaks = c(-8, -7, -6)) +
annotate("text", x = 0.65, y=-7.9, color = "skyblue", fontface = "bold",
label = paste("R2 =", round(summary(lm_percell_free_mpd)$adj.r.squared, digits = 3), "\n",
"p =", round(unname(summary(lm_percell_free_mpd)$coefficients[,4][2]), digits = 3))) +
theme(legend.title = element_blank(), legend.position ="none",
legend.text = element_text(size = 14))
row1_weighted_PD_plot <- plot_grid(weight_distribution_plot, weight_invsimps_vs_mpd_plot, weight_prod_vs_mpd_plot, weight_percell_vs_mpd_plot,
labels = c("A", "B", "C", "D"), ncol = 1, nrow = 4,
rel_heights = c(0.5, 0.8, 0.8, 1.2),
align = "v")
plot_grid(row1_weighted_PD_plot, season_legend,
ncol = 1, nrow = 2,
rel_heights = c(1, 0.05))

Cell counts and Unweighted SESmpd
# Combine the datasets into
cell_nums <- otu_alphadiv %>%
dplyr::select(norep_filter_name, fraction_bac_abund) %>%
distinct()
unweight_cellnums <- cell_nums %>%
left_join(unweighted_df, by = "norep_filter_name") %>%
dplyr::filter(norep_filter_name != "MOTEJ515")
# Is there a relationship between cell numbers and Unweighted Mean Pairwise distance?
summary(lm(log10(fraction_bac_abund) ~ mpd.obs.z, data = filter(unweight_cellnums)))
##
## Call:
## lm(formula = log10(fraction_bac_abund) ~ mpd.obs.z, data = filter(unweight_cellnums))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.4265 -0.2037 0.1391 0.3607 0.7260
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.2626 0.1134 46.397 < 2e-16 ***
## mpd.obs.z 0.5132 0.1362 3.767 0.00113 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5426 on 21 degrees of freedom
## Multiple R-squared: 0.4033, Adjusted R-squared: 0.3749
## F-statistic: 14.19 on 1 and 21 DF, p-value: 0.001131
summary(lm(log10(fraction_bac_abund) ~ mpd.obs.z, data = filter(unweight_cellnums, fraction == "Particle")))
##
## Call:
## lm(formula = log10(fraction_bac_abund) ~ mpd.obs.z, data = filter(unweight_cellnums,
## fraction == "Particle"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.60003 -0.11678 0.05996 0.23219 0.35154
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.6124 0.1147 40.212 1.81e-11 ***
## mpd.obs.z 0.0637 0.1086 0.587 0.572
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3152 on 9 degrees of freedom
## Multiple R-squared: 0.03682, Adjusted R-squared: -0.0702
## F-statistic: 0.344 on 1 and 9 DF, p-value: 0.5719
summary(lm(log10(fraction_bac_abund) ~ mpd.obs.z, data = filter(unweight_cellnums, fraction == "Free")))
##
## Call:
## lm(formula = log10(fraction_bac_abund) ~ mpd.obs.z, data = filter(unweight_cellnums,
## fraction == "Free"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.29232 -0.07382 -0.01386 0.12210 0.21771
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.76631 0.07691 74.974 4.35e-15 ***
## mpd.obs.z 0.15953 0.13886 1.149 0.277
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.169 on 10 degrees of freedom
## Multiple R-squared: 0.1166, Adjusted R-squared: 0.02825
## F-statistic: 1.32 on 1 and 10 DF, p-value: 0.2774
ggplot(unweight_cellnums, aes(y = log10(fraction_bac_abund), x = mpd.obs.z)) +
geom_vline(xintercept = 0, linetype = "dashed", size = 1.5) +
geom_point(size = 3, shape = 21, aes(fill = fraction)) +
ylab("log10(bacterial cells/mL)") +
xlab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = fraction_colors) +
geom_smooth(method = "lm", color = "#424645", fill = "#424645", alpha = 0.3) +
scale_x_continuous(limits = c(-1.5,1.75), breaks = seq(from = -1.5, to = 1.5, by = 0.5)) +
annotate("text", x = 0.75, y=4.25, color = "#424645", fontface = "bold",
label = paste("R2 =", round(summary(lm(log10(fraction_bac_abund) ~ mpd.obs.z, data = unweight_cellnums))$adj.r.squared, digits = 2), "\n",
"p =", round(unname(summary(lm(log10(fraction_bac_abund) ~ mpd.obs.z, data = unweight_cellnums))$coefficients[,4][2]), digits = 3))) +
theme(legend.title = element_blank(), legend.position = c(0.12, 0.9))

# Test by station
part_unweighted_df <- filter(unweighted_df, fraction == "Particle")
kruskal.test(mpd.obs.z ~ lakesite, data = filter(unweighted_df, fraction == "Particle"))
unweighted_df %>%
filter(fraction == "Particle") %>%
group_by(lakesite) %>%
summarize(mean(mpd.obs.z), sd(mpd.obs.z))
# Lakesite
plot_part_unweight_lakesite <- ggplot(filter(unweighted_df, fraction == "Particle"),
aes(y = mpd.obs.z, x = lakesite)) +
ggtitle("Particle-Associated Samples Only") +
scale_fill_manual(values = lakesite_colors) +
ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
geom_jitter(size = 3, shape = 21, aes(fill = lakesite), width = 0.2) +
geom_boxplot(aes(fill = lakesite), alpha = 0.5) +
theme(axis.title.x = element_blank(),
legend.position = c(0.85, 0.9), legend.title = element_blank())
# Season
plot_part_unweight_season <- ggplot(filter(unweighted_df, fraction == "Particle"),
aes(y = mpd.obs.z, x = season)) +
ggtitle("Particle-Associated Samples Only") +
ylab("Standardized Effect Size \n Unweighted Mean Pairwise Dist") +
scale_fill_manual(values = season_colors) +
geom_jitter(size = 3, shape = 21, aes(fill = season), width = 0.2) +
geom_boxplot(aes(fill = season), alpha = 0.5) +
theme(axis.title.x = element_blank(), axis.title.y = element_blank(),
legend.position = c(0.9, 0.9), legend.title = element_blank())
plot_grid(plot_part_unweight_lakesite, plot_part_unweight_season,
labels = c("A", "B"), ncol = 2, nrow = 1, align = "h")
plot_station_rich <-
ggplot(filter(otu_alphadiv, measure == "Richness" & fraction == "Particle"),
aes(x = lakesite, y = mean, fill = lakesite)) +
geom_jitter(size = 3, shape = 21) +
ylab("Observed Richness") +
ggtitle("Particle Fraction Only") +
geom_boxplot(outlier.shape = NA, color = "black", alpha = 0.5) +
scale_fill_manual(values = lakesite_colors) +
theme(legend.title = element_blank(),
legend.position = c(0.12, 0.9))
otu_alphadiv %>%
filter(measure == "Richness" & fraction == "Particle") %>%
group_by(lakesite) %>%
summarize(mean(mean), sd(mean))
plot_station_invsimps <-
ggplot(filter(otu_alphadiv, measure == "Inverse_Simpson" & fraction == "Particle"),
aes(x = lakesite, y = mean, fill = lakesite)) +
geom_jitter(size = 3, shape = 21) +
ggtitle("Particle Fraction Only") +
ylab("Inverse Simpson") +
geom_boxplot(outlier.shape = NA, color = "black", alpha = 0.5) +
scale_fill_manual(values = lakesite_colors) +
theme(legend.title = element_blank(),
legend.position = "none")
otu_alphadiv %>%
filter(measure == "Inverse_Simpson" & fraction == "Particle") %>%
group_by(lakesite) %>%
summarize(mean(mean))
plot_grid(plot_station_rich, plot_station_invsimps,
labels = c("A", "B"), ncol = 2, nrow = 1, align = "h")
#### FREE LIVING
plot_station_rich_FL <-
ggplot(filter(otu_alphadiv, measure == "Richness" & fraction == "Free"),
aes(x = lakesite, y = mean, fill = lakesite)) +
geom_jitter(size = 3, shape = 21) +
ylab("Observed Richness") +
ggtitle("Free-Living Fraction Only") +
geom_boxplot(outlier.shape = NA, color = "black", alpha = 0.5) +
scale_fill_manual(values = lakesite_colors) +
theme(legend.title = element_blank(),
legend.position = c(0.12, 0.9))
otu_alphadiv %>%
filter(measure == "Richness" & fraction == "Free") %>%
group_by(lakesite) %>%
summarize(mean(mean), sd(mean))
plot_station_invsimps_FL <-
ggplot(filter(otu_alphadiv, measure == "Inverse_Simpson" & fraction == "Free"),
aes(x = lakesite, y = mean, fill = lakesite)) +
geom_jitter(size = 3, shape = 21) +
ggtitle("Free-Living Fraction Only") +
ylab("Inverse Simpson") +
geom_boxplot(outlier.shape = NA, color = "black", alpha = 0.5) +
scale_fill_manual(values = lakesite_colors) +
theme(legend.title = element_blank(),
legend.position = "none")
otu_alphadiv %>%
filter(measure == "Inverse_Simpson" & fraction == "Free") %>%
group_by(lakesite) %>%
summarize(mean(mean))
plot_grid(plot_station_rich_FL, plot_station_invsimps_FL,
labels = c("A", "B"), ncol = 2, nrow = 1, align = "h")